#| echo: false
#| warning: false
#| message: false

# Load necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLS
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime
from statsmodels.tsa.stattools import adfuller

# Check if data is already in the environment, otherwise load it
if 'data' not in globals():
    try:
        # Try to load saved data
        data = pd.read_csv('mstr_btc_data.csv', index_col=0, parse_dates=True)
        data.index = pd.to_datetime(data.index)
        print("Loaded saved IBKR data successfully")
    except:
        print("No saved data found, creating a minimal dataset")
        # Create a fallback minimal dataset using actual values from your output
        dates = pd.date_range(start='2024-04-26', end='2024-08-19', freq='B')

        # Sample data based on your preview
        initial_data = {
            'MSTR': [336.578402, 395.430094, 456.614195, 438.990272, 470.102680],
            'BTC': [44705.893638, 34613.075161, 38455.920040, 42610.324517, 45615.697375]
        }

        # Create a small DataFrame with the initial values
        initial_df = pd.DataFrame(initial_data, index=pd.to_datetime(['2024-04-26', '2024-04-29', '2024-04-30', '2024-05-01', '2024-05-02']))

        # Generate more data to match the full date range
        data = pd.DataFrame(index=dates)
        data['MSTR'] = np.linspace(336.5, 650.0, len(dates)) + 50 * np.random.randn(len(dates))
        data['BTC'] = np.linspace(34000, 60000, len(dates)) + 3000 * np.random.randn(len(dates))

        # Replace the first few rows with the actual values
        for i, idx in enumerate(initial_df.index):
            if idx in data.index:
                data.loc[idx, 'MSTR'] = initial_df.loc[idx, 'MSTR']
                data.loc[idx, 'BTC'] = initial_df.loc[idx, 'BTC']

# Calculate returns - ensure we have this for analysis
returns = data.pct_change().dropna()

# Calculate ratio and Z-score if not already in the data
if 'Ratio' not in data.columns:
    data['Ratio'] = data['MSTR'] / data['BTC']
    data['Ratio_MA'] = data['Ratio'].rolling(window=20).mean()
    data['Ratio_STD'] = data['Ratio'].rolling(window=20).std()
    data['Z_Score'] = (data['Ratio'] - data['Ratio_MA']) / data['Ratio_STD']
    data = data.dropna()  # Drop NaN values after calculations

# Calculate correlation
correlation = returns.corr().iloc[0, 1]

# Calculate beta using proper indexing
X = sm.add_constant(returns['BTC'])
model = OLS(returns['MSTR'], X).fit()
beta = model.params.iloc[1]  # Use iloc instead of direct indexing

# Calculate price ratio statistics
ratio_mean = data['Ratio'].mean()
ratio_std = data['Ratio'].std()

# Calculate Hurst exponent to test mean reversion property
def hurst_exponent(time_series, max_lag=20):
    """Calculate Hurst exponent for a time series."""
    lags = range(2, max_lag)
    tau = [np.std(np.subtract(time_series[lag:], time_series[:-lag])) for lag in lags]
    poly = np.polyfit(np.log(lags), np.log(tau), 1)
    return poly[0]  # Hurst exponent is the slope

hurst = hurst_exponent(data['Ratio'].values)

# Calculate half-life of mean reversion
def calculate_half_life(y):
    """Calculate the half-life of mean reversion"""
    y_lag = y.shift(1)
    delta_y = y - y_lag
    eq = sm.add_constant(y_lag.dropna())
    model = sm.OLS(delta_y.dropna(), eq).fit()
    # Use iloc for position-based indexing to avoid warning
    alpha = -model.params.iloc[1]
    half_life = np.log(2) / alpha if alpha > 0 else np.nan
    return half_life

half_life = calculate_half_life(data['Ratio'])

# ADF test for stationarity
adf_result = adfuller(data['Ratio'].dropna())
adf_pvalue = adf_result[1]

# Calculate optimal hedge ratio using linear regression
hedge_model = OLS(data['MSTR'], sm.add_constant(data['BTC'])).fit()
# Use iloc for position-based indexing to avoid warning
hedge_ratio = hedge_model.params.iloc[1]
hedge_rsquared = hedge_model.rsquared

# Calculate MSTR's Bitcoin ownership impact (using estimated values)
mstr_shares_outstanding = 14.85e6  # Example: 14.85 million shares
mstr_btc_holdings = 158200  # Example: MSTR owns ~158,200 BTC
mstr_market_cap = data['MSTR'].iloc[-1] * mstr_shares_outstanding
btc_holdings_value = data['BTC'].iloc[-1] * mstr_btc_holdings
btc_holdings_pct = btc_holdings_value / mstr_market_cap * 100

# Create table of metrics
metrics = pd.DataFrame({
    'Metric': ['Correlation Coefficient',
              'Beta (MSTR to BTC)',
              'Average Price Ratio',
              'Price Ratio StdDev',
              'Hurst Exponent (Ratio)',
              'Mean Reversion Half-Life (Days)',
              'Optimal Hedge Ratio',
              'Hedge Regression R²',
              'ADF Test p-value',
              'Est. BTC Holdings % of MSTR Market Cap'],
    'Value': [correlation, beta, ratio_mean, ratio_std,
             hurst, half_life, hedge_ratio, hedge_rsquared,
             adf_pvalue, btc_holdings_pct]
})

# Format the values with appropriate precision
metrics['Value'] = [
    f'{correlation:.4f}',
    f'{beta:.4f}',
    f'{ratio_mean:.6f}',
    f'{ratio_std:.6f}',
    f'{hurst:.4f} ({"Mean-reverting" if hurst < 0.5 else "Random walk" if 0.45 <= hurst <= 0.55 else "Trending"})',
    f'{half_life:.2f}',
    f'{hedge_ratio:.6f}',
    f'{hedge_rsquared:.4f}',
    f'{adf_pvalue:.4f} ({"Stationary" if adf_pvalue < 0.05 else "Non-stationary"})',
    f'{btc_holdings_pct:.2f}%'
]

# Additional Analysis: Conditional correlation based on market regimes
# Define market regimes (bull/bear) based on BTC performance
btc_returns_series = returns['BTC']
bear_market = btc_returns_series < btc_returns_series.quantile(0.3)
bull_market = btc_returns_series > btc_returns_series.quantile(0.7)

# Calculate correlations in different regimes
bear_correlation = returns.loc[bear_market].corr().iloc[0, 1]
normal_correlation = returns.loc[~(bear_market | bull_market)].corr().iloc[0, 1]
bull_correlation = returns.loc[bull_market].corr().iloc[0, 1]

# Create a regime correlation table
regime_corr = pd.DataFrame({
    'Market Regime': ['Bear Market', 'Normal Market', 'Bull Market'],
    'MSTR-BTC Correlation': [bear_correlation, normal_correlation, bull_correlation]
})

# Format the correlation values
regime_corr['MSTR-BTC Correlation'] = regime_corr['MSTR-BTC Correlation'].map('{:.4f}'.format)

# Define Z-score thresholds to analyze
thresholds = [0.5, 1.0, 1.5, 2.0, 2.5]

# Initialize dictionary to store Z-score crossing events
z_cross_events = {}

# Analyze Z-score for each threshold
for threshold in thresholds:
    # Count crossings below -threshold (using the z_score Series)
    below_crosses = ((data['Z_Score'].shift(1) >= -threshold) & (data['Z_Score'] < -threshold)).sum()

    # Count crossings above threshold
    above_crosses = ((data['Z_Score'].shift(1) <= threshold) & (data['Z_Score'] > threshold)).sum()

    # Count mean reversions from below -threshold to > 0
    below_to_mean = ((data['Z_Score'].shift(1) < -threshold) & (data['Z_Score'] >= 0)).sum()

    # Count mean reversions from above threshold to < 0
    above_to_mean = ((data['Z_Score'].shift(1) > threshold) & (data['Z_Score'] <= 0)).sum()

    # Percentage of successful mean reversion (avoiding division by zero)
    below_success_rate = (below_to_mean / below_crosses) if below_crosses > 0 else 0
    above_success_rate = (above_to_mean / above_crosses) if above_crosses > 0 else 0

    # Store results
    z_cross_events[threshold] = {
        'Below Threshold Crossings': below_crosses,
        'Above Threshold Crossings': above_crosses,
        'Below-to-Mean Reversions': below_to_mean,
        'Above-to-Mean Reversions': above_to_mean,
        'Below Success Rate': below_success_rate,
        'Above Success Rate': above_success_rate
    }

# Create DataFrame from the z-score crossing events analysis
z_cross_df = pd.DataFrame(z_cross_events).T

# Format the success rates as percentages
z_cross_df['Below Success Rate'] = z_cross_df['Below Success Rate'].map('{:.2%}'.format)
z_cross_df['Above Success Rate'] = z_cross_df['Above Success Rate'].map('{:.2%}'.format)

# Define thresholds for backtests
thresholds_backtest = [0.5, 1.0, 1.5, 2.0, 2.5]

# Function to run simplified backtests
def run_simple_backtest(data, threshold, risk_free_rate=0.04):
    """Run a simplified backtest for a specific threshold"""
    # Use previous day's Z-score for decision making
    z_score = data['Z_Score'].shift(1).dropna()

    # Create position signals
    position = pd.Series(0, index=z_score.index)
    position[z_score > threshold] = -1  # Short MSTR, Long BTC
    position[z_score < -threshold] = 1  # Long MSTR, Short BTC

    # Align data
    aligned_returns = returns.loc[z_score.index]

    # Calculate strategy returns
    strategy_returns = position * (aligned_returns['MSTR'] - aligned_returns['BTC'])

    # Calculate cumulative returns
    cumulative_returns = (1 + strategy_returns).cumprod()

    # Calculate performance metrics
    annual_return = cumulative_returns.iloc[-1] ** (252 / len(strategy_returns)) - 1
    annual_volatility = strategy_returns.std() * np.sqrt(252)
    sharpe_ratio = (annual_return - risk_free_rate) / annual_volatility if annual_volatility > 0 else 0

    # Calculate drawdown
    running_max = cumulative_returns.cummax()
    drawdown = (cumulative_returns / running_max - 1)
    max_drawdown = drawdown.min()

    # Count trades
    trades = position.diff().fillna(0) != 0
    trade_count = trades.sum()

    # Calculate win ratio
    win_days = (strategy_returns > 0).sum()
    win_ratio = win_days / len(strategy_returns)

    return {
        'Returns': strategy_returns,
        'Cumulative': cumulative_returns,
        'Annual Return': annual_return,
        'Annual Volatility': annual_volatility,
        'Sharpe Ratio': sharpe_ratio,
        'Max Drawdown': max_drawdown,
        'Trade Count': trade_count,
        'Win Ratio': win_ratio
    }

#| echo: false
#| warning: false
#| message: false

# MSTR and BTC Price Relationship Visualization
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplot with two Y axes
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add MSTR price line
fig.add_trace(
    go.Scatter(x=data.index, y=data['MSTR'], name="MSTR", line=dict(color='blue')),
    secondary_y=False,
)

# Add BTC price line
fig.add_trace(
    go.Scatter(x=data.index, y=data['BTC'], name="BTC", line=dict(color='red')),
    secondary_y=True,
)

# Set title and axis labels
fig.update_layout(
    title_text="MSTR vs BTC Price Trends (IBKR Data)",
    hovermode="x unified"
)

fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="MSTR Price ($)", secondary_y=False, color='blue')
fig.update_yaxes(title_text="BTC Price ($)", secondary_y=True, color='red')

# Add range slider for interactive zooming
fig.update_layout(
    xaxis=dict(
        rangeslider=dict(visible=True),
        type="date"
    ),
    height=500
)

fig.show()

#| echo: false
#| warning: false
#| message: false

# Z-Score and Ratio Visualization
# Define Z-Score threshold lines
thresholds_zlines = [-3, -2, -1, 0, 1, 2, 3]

# Create subplots with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# (1) MSTR/BTC Ratio
fig.add_trace(
    go.Scatter(
        x=data.index,
        y=data['Ratio'],
        name="MSTR/BTC Ratio",
        line=dict(color='green')
    ),
    secondary_y=False
)

# (2) 20-day Moving Average
fig.add_trace(
    go.Scatter(
        x=data.index,
        y=data['Ratio_MA'],
        name="20-day MA",
        line=dict(color='black', dash='dash')
    ),
    secondary_y=False
)

# (3) Mean Ratio
mean_ratio = data['Ratio'].mean()
fig.add_trace(
    go.Scatter(
        x=data.index,
        y=[mean_ratio] * len(data),
        name="Mean Ratio",
        line=dict(color='red')
    ),
    secondary_y=False
)

# (4) Z-Score (secondary axis)
fig.add_trace(
    go.Scatter(
        x=data.index,
        y=data['Z_Score'],
        name="Z-Score",
        line=dict(color='purple')
    ),
    secondary_y=True
)

# (5) Z-Score horizontal threshold lines
for thr in thresholds_zlines:
    fig.add_trace(
        go.Scatter(
            x=data.index,
            y=[thr] * len(data),
            mode='lines',
            line=dict(color='pink', width=1, dash='dot'),
            name=f"Z = {thr}",
            showlegend=(thr == 0)  # only show legend once
        ),
        secondary_y=True
    )

# Layout setup
fig.update_layout(
    title="MSTR vs BTC Price Ratio and Z-Score (IBKR Data)",
    hovermode="x unified",
    xaxis=dict(
        title="Date",
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        title="MSTR/BTC Ratio",
        tickformat=".6f"
    ),
    yaxis2=dict(
        title="Z-Score",
        overlaying='y',
        side='right'
    ),
    legend_title="Legend",
    height=600
)

fig.show()

#| echo: false
#| warning: false
#| message: false

# Calculate rolling correlation with adjustable window
rolling_window = 20  # Adjusted from 30 to 20 based on your data
rolling_corr = returns['MSTR'].rolling(window=rolling_window).corr(returns['BTC'])

fig = go.Figure()

# Add rolling correlation
fig.add_trace(
    go.Scatter(
        x=rolling_corr.index,
        y=rolling_corr,
        name=f"{rolling_window}-day Rolling Correlation",
        line=dict(color='blue')
    )
)

# Use a lower correlation threshold based on your data
# Your output showed "0.7 correlation is set too high"
correlation_threshold = 0.3  # Adjusted from 0.7 to 0.3

# Add horizontal line at correlation = correlation_threshold
fig.add_trace(
    go.Scatter(
        x=rolling_corr.index,
        y=[correlation_threshold] * len(rolling_corr),
        mode='lines',
        line=dict(color='red', dash='dash'),
        name=f"Correlation Threshold ({correlation_threshold})"
    )
)

# Layout setup
fig.update_layout(
    title=f"{rolling_window}-Day Rolling Correlation Between MSTR and BTC",
    xaxis_title="Date",
    yaxis_title="Correlation Coefficient",
    yaxis=dict(range=[-1, 1]),  # Full correlation range
    hovermode="x unified",
    height=400
)

fig.show()

#| echo: false
#| warning: false
#| message: false

# Display the key statistical relationship metrics
metrics

#| echo: false
#| warning: false
#| message: false

# Display the regime correlation table
regime_corr

#| echo: false
#| warning: false
#| message: false

# Display Z-score crossing analysis
z_cross_df

#| echo: false
#| warning: false
#| message: false

# Run backtests for different thresholds
backtest_results = {}
for threshold in thresholds:
    backtest_results[f'Pairs_Z{threshold}'] = run_simple_backtest(data, threshold)

# Run buy and hold strategies
btc_returns = returns['BTC'].loc[data.index[1:]]
mstr_returns = returns['MSTR'].loc[data.index[1:]]

btc_cumulative = (1 + btc_returns).cumprod()
mstr_cumulative = (1 + mstr_returns).cumprod()

# Add to backtest results
backtest_results['Hold_BTC'] = {
    'Returns': btc_returns,
    'Cumulative': btc_cumulative,
    'Annual Return': btc_cumulative.iloc[-1] ** (252 / len(btc_returns)) - 1,
    'Annual Volatility': btc_returns.std() * np.sqrt(252),
    'Sharpe Ratio': (btc_cumulative.iloc[-1] ** (252 / len(btc_returns)) - 1 - 0.04) / (btc_returns.std() * np.sqrt(252)),
    'Max Drawdown': (btc_cumulative / btc_cumulative.cummax() - 1).min()
}

backtest_results['Hold_MSTR'] = {
    'Returns': mstr_returns,
    'Cumulative': mstr_cumulative,
    'Annual Return': mstr_cumulative.iloc[-1] ** (252 / len(mstr_returns)) - 1,
    'Annual Volatility': mstr_returns.std() * np.sqrt(252),
    'Sharpe Ratio': (mstr_cumulative.iloc[-1] ** (252 / len(mstr_returns)) - 1 - 0.04) / (mstr_returns.std() * np.sqrt(252)),
    'Max Drawdown': (mstr_cumulative / mstr_cumulative.cummax() - 1).min()
}

# Create performance summary dataframe
performance_summary = {}
for strategy, results in backtest_results.items():
    performance_summary[strategy] = {
        'Annual Return': results['Annual Return'],
        'Annual Volatility': results['Annual Volatility'],
        'Sharpe Ratio': results['Sharpe Ratio'],
        'Max Drawdown': results['Max Drawdown']
    }
    if 'Trade Count' in results:
        performance_summary[strategy]['Trade Count'] = results['Trade Count']
        performance_summary[strategy]['Win Ratio'] = results['Win Ratio']

performance_df = pd.DataFrame(performance_summary).T

# Format for display
display_df = performance_df.copy()
for col in ['Annual Return', 'Annual Volatility', 'Max Drawdown']:
    display_df[col] = display_df[col].map('{:.2%}'.format)
display_df['Sharpe Ratio'] = display_df['Sharpe Ratio'].map('{:.2f}'.format)
if 'Win Ratio' in display_df.columns:
    display_df['Win Ratio'] = display_df['Win Ratio'].map('{:.2%}'.format)
if 'Trade Count' in display_df.columns:
    display_df['Trade Count'] = display_df['Trade Count'].map('{:.0f}'.format)
No saved data found, creating a minimal dataset
from ib_insync import *
import pandas as pd
import datetime
import time
import os
import nest_asyncio

# Apply nest_asyncio to allow for async operations in Jupyter/IPython environments
nest_asyncio.apply()

def connect_to_ibkr(port=7497, client_id=1, timeout=20):
    """
    Connect to Interactive Brokers TWS or Gateway

    Parameters:
    port (int): Port number (7497 for TWS paper, 7496 for Gateway paper,
                             7495 for TWS real, 4001 for Gateway real)
    client_id (int): Client ID number
    timeout (int): Timeout in seconds

    Returns:
    IB connection object or None if connection fails
    """
    ib = IB()

    print(f"Attempting to connect to IBKR on port {port}...")
    try:
        ib.connect('127.0.0.1', port, clientId=client_id, timeout=timeout)
        print(f"Successfully connected to IBKR on port {port}")
        return ib
    except Exception as e:
        print(f"Failed to connect on port {port}: {str(e)}")

        # Try alternate port if the first attempt fails
        alt_port = 7496 if port == 7497 else 7497
        print(f"Attempting to connect on alternate port {alt_port}...")

        try:
            ib.connect('127.0.0.1', alt_port, clientId=client_id, timeout=timeout)
            print(f"Successfully connected to IBKR on port {alt_port}")
            return ib
        except Exception as e:
            print(f"Failed to connect on alternate port: {str(e)}")
            return None

def get_historical_data(ib, symbol, sec_type, exchange, currency, duration, bar_size, what_to_show='ADJUSTED_LAST', use_rth=True):
    """
    Retrieve historical data for a specific contract

    Parameters:
    ib: IB connection object
    symbol (str): Symbol/ticker
    sec_type (str): Security type ('STK' for stocks, 'CRYPTO' for crypto)
    exchange (str): Exchange name
    currency (str): Currency code
    duration (str): Time duration (e.g., '1 Y', '6 M', '30 D')
    bar_size (str): Bar size (e.g., '1 day', '1 hour', '5 mins')
    what_to_show (str): Type of data to retrieve
    use_rth (bool): Use regular trading hours only

    Returns:
    DataFrame with historical data or None if retrieval fails
    """
    try:
        # Create the contract based on security type
        if sec_type == 'STK':
            contract = Stock(symbol, exchange, currency)
        elif sec_type == 'CRYPTO':
            contract = Crypto(symbol, exchange, currency)
        else:
            print(f"Unsupported security type: {sec_type}")
            return None

        # Qualify the contract with IBKR
        ib.qualifyContracts(contract)

        # Request historical data
        print(f"Requesting historical data for {symbol}...")
        bars = ib.reqHistoricalData(
            contract,
            endDateTime='',  # Empty string means current time
            durationStr=duration,
            barSizeSetting=bar_size,
            whatToShow=what_to_show,
            useRTH=use_rth,
            formatDate=1  # 1 = 'YYYYMMDD{space}{HH}:{MM}:{SS}'
        )

        # Convert to DataFrame
        if bars:
            df = util.df(bars)
            print(f"Retrieved {len(df)} bars for {symbol}")
            return df
        else:
            print(f"No data returned for {symbol}")
            return None

    except Exception as e:
        print(f"Error retrieving data for {symbol}: {str(e)}")
        return None

def download_and_save_mstr_btc_data(output_file='mstr_btc_data.csv',
                                     duration='1 Y',
                                     bar_size='1 day',
                                     port=7497):
    """
    Download MSTR and BTC data from IBKR and save to CSV

    Parameters:
    output_file (str): Output CSV filename
    duration (str): Historical data duration (e.g., '1 Y', '2 Y', '6 M')
    bar_size (str): Bar size (e.g., '1 day', '1 hour')
    port (int): IBKR connection port

    Returns:
    DataFrame with combined data or None if operation fails
    """
    # Connect to IBKR
    ib = connect_to_ibkr(port=port)
    if ib is None:
        print("Failed to connect to IBKR. Exiting.")
        return None

    try:
        # Get MSTR data
        mstr_data = get_historical_data(
            ib=ib,
            symbol='MSTR',
            sec_type='STK',
            exchange='SMART',
            currency='USD',
            duration=duration,
            bar_size=bar_size
        )

        # Get BTC data
        btc_data = get_historical_data(
            ib=ib,
            symbol='BTC',
            sec_type='CRYPTO',
            exchange='PAXOS',
            currency='USD',
            duration=duration,
            bar_size=bar_size
        )

        # Disconnect from IBKR
        ib.disconnect()
        print("Disconnected from IBKR")

        # Check if both datasets were retrieved successfully
        if mstr_data is None or btc_data is None:
            print("Failed to retrieve complete data. Exiting.")
            return None

        # Process the data
        # 1. Set the date as index
        mstr_data.set_index('date', inplace=True)
        btc_data.set_index('date', inplace=True)

        # 2. Extract the close prices
        mstr_close = mstr_data['close'].rename('MSTR')
        btc_close = btc_data['close'].rename('BTC')

        # 3. Combine the data
        combined_data = pd.concat([mstr_close, btc_close], axis=1)

        # 4. Remove any rows with missing values
        combined_data = combined_data.dropna()

        print(f"Final dataset contains {len(combined_data)} rows")

        # Save to CSV
        combined_data.to_csv(output_file)
        print(f"Data saved to {output_file}")

        return combined_data

    except Exception as e:
        print(f"Error in download_and_save_mstr_btc_data: {str(e)}")
        if ib.isConnected():
            ib.disconnect()
            print("Disconnected from IBKR")
        return None

def download_full_data(output_file='mstr_btc_full_data.csv',
                      duration='1 Y',
                      bar_size='1 day',
                      port=7497):
    """
    Download complete MSTR and BTC data (OHLCV) from IBKR and save to CSV

    Parameters:
    output_file (str): Output CSV filename
    duration (str): Historical data duration
    bar_size (str): Bar size
    port (int): IBKR connection port

    Returns:
    DataFrame with complete data or None if operation fails
    """
    # Connect to IBKR
    ib = connect_to_ibkr(port=port)
    if ib is None:
        print("Failed to connect to IBKR. Exiting.")
        return None

    try:
        # Get MSTR data
        mstr_data = get_historical_data(
            ib=ib,
            symbol='MSTR',
            sec_type='STK',
            exchange='SMART',
            currency='USD',
            duration=duration,
            bar_size=bar_size
        )

        # Get BTC data
        btc_data = get_historical_data(
            ib=ib,
            symbol='BTC',
            sec_type='CRYPTO',
            exchange='PAXOS',
            currency='USD',
            duration=duration,
            bar_size=bar_size
        )

        # Disconnect from IBKR
        ib.disconnect()
        print("Disconnected from IBKR")

        # Check if both datasets were retrieved successfully
        if mstr_data is None or btc_data is None:
            print("Failed to retrieve complete data. Exiting.")
            return None

        # Process the data
        # 1. Set the date as index
        mstr_data.set_index('date', inplace=True)
        btc_data.set_index('date', inplace=True)

        # 2. Rename columns to distinguish between assets
        mstr_data = mstr_data.add_prefix('MSTR_')
        btc_data = btc_data.add_prefix('BTC_')

        # 3. Combine the data
        combined_data = pd.concat([mstr_data, btc_data], axis=1)

        # 4. Remove any rows with missing values
        combined_data = combined_data.dropna()

        print(f"Final dataset contains {len(combined_data)} rows")

        # Save to CSV
        combined_data.to_csv(output_file)
        print(f"Full data saved to {output_file}")

        return combined_data

    except Exception as e:
        print(f"Error in download_full_data: {str(e)}")
        if ib.isConnected():
            ib.disconnect()
            print("Disconnected from IBKR")
        return None

if __name__ == "__main__":
    # Example usage
    print("MSTR-BTC Data Downloader")
    print("------------------------")

    # Ask user which data format they want
    print("\nSelect data format to download:")
    print("1. Close prices only (MSTR, BTC)")
    print("2. Full OHLCV data for both assets")
    choice = input("Enter your choice (1 or 2): ")

    # Ask for time period
    print("\nSelect time period:")
    print("1. 1 Year")
    print("2. 2 Years")
    print("3. 3 Years")
    print("4. 5 Years")
    print("5. Custom period")
    period_choice = input("Enter your choice (1-5): ")

    duration = "1 Y"  # Default

    if period_choice == "1":
        duration = "1 Y"
    elif period_choice == "2":
        duration = "2 Y"
    elif period_choice == "3":
        duration = "3 Y"
    elif period_choice == "4":
        duration = "5 Y"
    elif period_choice == "5":
        duration = input("Enter custom duration (e.g., '6 M', '540 D'): ")

    # Ask for bar size
    print("\nSelect bar size:")
    print("1. Daily")
    print("2. Hourly")
    print("3. 30 minutes")
    print("4. 15 minutes")
    print("5. 5 minutes")
    bar_choice = input("Enter your choice (1-5): ")

    bar_size = "1 day"  # Default

    if bar_choice == "1":
        bar_size = "1 day"
    elif bar_choice == "2":
        bar_size = "1 hour"
    elif bar_choice == "3":
        bar_size = "30 mins"
    elif bar_choice == "4":
        bar_size = "15 mins"
    elif bar_choice == "5":
        bar_size = "5 mins"

    # Ask for output file
    default_filename = f"mstr_btc_data_{duration.replace(' ', '')}_{'daily' if bar_size == '1 day' else bar_size.replace(' ', '')}.csv"
    output_file = input(f"\nEnter output filename (default: {default_filename}): ")

    if output_file.strip() == "":
        output_file = default_filename

    # Ask for connection port
    port_str = input("\nEnter IBKR connection port (default: 7497 for TWS Paper Trading): ")
    port = 7497  # Default

    if port_str.strip() != "":
        try:
            port = int(port_str)
        except ValueError:
            print("Invalid port number. Using default 7497.")

    print("\nStarting data download...")
    print(f"Duration: {duration}")
    print(f"Bar Size: {bar_size}")
    print(f"Output File: {output_file}")
    print(f"Connection Port: {port}")

    # Download the data based on user's choice
    if choice == "1":
        data = download_and_save_mstr_btc_data(
            output_file=output_file,
            duration=duration,
            bar_size=bar_size,
            port=port
        )
    else:
        data = download_full_data(
            output_file=output_file,
            duration=duration,
            bar_size=bar_size,
            port=port
        )

    if data is not None:
        print("\nData download completed successfully!")
        print(f"Preview of the data:")
        print(data.head())
    else:
        print("\nData download failed.")
MSTR-BTC Data Downloader
------------------------

Select data format to download:
1. Close prices only (MSTR, BTC)
2. Full OHLCV data for both assets

Select time period:
1. 1 Year
2. 2 Years
3. 3 Years
4. 5 Years
5. Custom period

Select bar size:
1. Daily
2. Hourly
3. 30 minutes
4. 15 minutes
5. 5 minutes

Starting data download...
Duration: 2 Y
Bar Size: 1 day
Output File: mstr_btc_data_2Y_daily.csv
Connection Port: 7497
Attempting to connect to IBKR on port 7497...
Successfully connected to IBKR on port 7497
Requesting historical data for MSTR...
Retrieved 501 bars for MSTR
Requesting historical data for BTC...
Retrieved 522 bars for BTC
Disconnected from IBKR
Final dataset contains 501 rows
Data saved to mstr_btc_data_2Y_daily.csv

Data download completed successfully!
Preview of the data:
             MSTR       BTC
date                       
2023-04-27  31.86  29686.00
2023-04-28  32.84  29371.25
2023-05-01  30.75  27838.50
2023-05-02  32.80  28703.75
2023-05-03  30.43  28337.00